'''
Created on Sep 27, 2011

@author: oabalbin
'''
from collections import defaultdict


def dict_read_table(file):
    
    ifile = open(file)
    itable = defaultdict()
    for l in ifile:
        if l.startswith('#'):
            continue
        f = l.strip('\n').split('\t')
        itable[f[0]+'@'+f[1]] = f
    
    return itable

def read_isec_hapmap(file):
    ifile = open(file)
    itable = defaultdict()
    for l in ifile:
        f = l.strip('\n').split('\t')
        chr, snp, ft = f[0],str(int(f[1])+1), f[3].split('|')
        rsID,freq=ft[0],ft[2]
        itable[chr+'@'+snp] = [rsID,freq]
    
    return itable


def read_polyphen2_output(file_name):
    '''
    '''
    inputfile=open(file_name)
    pp2_sites=defaultdict()
    header=inputfile.next()
    for l in inputfile:
        if l.startswith('#'):
            continue
        fields=l.replace(' ','').strip('\t\n').split('\t')
        
        #print fields
        o_snp_id=fields[0].split('.')[0]
        POS=o_snp_id.replace(':','@')
        prot_name=fields[2]
        prediction=fields[6]
        pp2_sites[POS]=fields[1:]
    header=header.replace(' ','').strip('\t\n').split('\t')[1:]
    
    return pp2_sites,header

    
def summarizer(ofile,table, isec_hapmap, isec_polyphen):
    
    ofile = open(ofile,'w')
    ctable = dict_read_table(table)
    chapmap = read_isec_hapmap(isec_hapmap)
    pp2_prediction,headpp2=read_polyphen2_output(isec_polyphen)
    pp2tabs = ['NA']*len(headpp2)
    print headpp2
    h = open(table)
    header = h.next()
    header = header.strip("\n").split('\t')+ ["rsID","FREQ"]
    print header
    hd =header+headpp2
    ofile.write(",".join(hd).replace(',','\t')+'\n')

    for s, info in ctable.iteritems():
        try:
            ft = chapmap[s]
            ol = info + ft 
        except:
            ol = info + ['NA','NA']
        print ol
        try:
            pp = pp2_prediction[s]
            ol += pp
        except:
            ol +=pp2tabs
        print ol
        
        ofile.write(",".join(ol).replace(',','\t')+'\n')

            

table = '/exds/users/oabalbin/projects/exomes/nunez2/sam_calls_quick/nunez.summary.snps.annot_isec_exomePad50h2.txt'
isec_hapmap='/exds/users/oabalbin/projects/exomes/nunez2/sam_calls_quick/nunez.summary.snps.annot_isec_exomePad50h.EUR_allele_freq.bed'
ofile = '/exds/users/oabalbin/projects/exomes/nunez2/sam_calls_quick/nunez.summaryzer.output.txt'
polyphen2='/exds/users/oabalbin/projects/exomes/nunez2/pph2-short.txt'
summarizer(ofile, table, isec_hapmap, polyphen2)

#read_polyphen2_output(polyphen2)



    
    